{-# LANGUAGE OverloadedStrings  #-}

module Features 
    ( features
    , maybeFeatures
    , inputFeatures
    , outputFeatures
    , indexFeatures
    , eval 
    )
where

import qualified Helper.Text as Text
import Helper.Text (Txt)
import qualified Helper.ListZipper as LZ
import Helper.ListZipper (ListZipper,at)
import CorpusReader (Token,fromWords)
import qualified Data.Char as Char
import Data.List (group,sort)
import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap
import Helper.Atom (MonadAtoms,AtomTable,from,toAtom,maybeToAtom)
import Data.Maybe (catMaybes,isNothing)
import Control.Monad (liftM2)
import Data.Monoid (mappend)
import Config 
import qualified Data.Vector.Unboxed as V
import FeatureTemplate (Feature(..))
import Data.Word (Word,Word64)
import qualified Hashable as H
import Data.Int

toAtom' :: Int -> Txt -> Int
toAtom' size s = fromIntegral ((H.hash s::Word64) `rem` fromIntegral size)

iNDEX_SUFFIX :: Txt
iNDEX_SUFFIX="::index"
iNPUT_PREFIX :: Txt
iNPUT_PREFIX="in:"
oUTPUT_PREFIX :: Txt
oUTPUT_PREFIX="out:"
nULL_MARK :: Txt
nULL_MARK = "<NULL>"


eval :: ListZipper Token -> Feature -> [Maybe Txt]
eval z (Cell r c)       = case z `at` r of 
                            [] -> [Nothing]
                            fs -> [fs `index` c] 
eval z (Rect r c r' c') = concat [ eval z (Cell i j) | i <- [r..r'] 
                                                     , j <- [c..c'] ]
eval z (Row r)          = concat [ eval z (Cell r j) 
                                   | j <- [0..length (z `at` 0)-1] ]
eval z (MarkNull f)     = [ maybe (Just nULL_MARK) Just fi  
                               | fi <- eval z f ]
eval z (Index f)        = [ fi+++Just iNDEX_SUFFIX | fi <- eval z f ]
eval z (Cat fs)         = concatMap (eval z) fs
eval z (Cart f f')      = [ fmap Text.normalize $ fi +++ Just "," +++ fi' 
                                | fi <- eval z f , fi' <- eval z f' ]
eval z (Lower f)        = [ fmap (Text.map Char.toLower) fi | fi <- eval  z f ]
eval z (Suffix i f)     = [   fmap (Text.reverse
                                  . Text.take (fromIntegral i)
                                  . Text.reverse )
                                  $ fi | fi <- eval z f ]
eval z (Prefix i f)     = [ fmap (Text.take (fromIntegral i)) $ fi 
                                | fi <- eval z f ]
eval z (WordShape f)    = [ fmap (spellingSpec) fi | fi <- eval z f ]     

spellingSpec  = Text.fromString 
                 . map  (\(x:xs) -> x) 
                 . group 
                 . map collapse 
                 . Text.toString

collapse c | Char.isAlpha c && Char.isUpper c = 'X'
           | Char.isAlpha c && Char.isLower c = 'x'
           | Char.isDigit c              = '0'
           | c == '-'               = '-'
           | c == '_'               = '_'
           | otherwise              = '*'

indexFeatures :: AtomTable -> IntSet.IntSet 
indexFeatures  =
     IntMap.keysSet 
           . IntMap.filter (iNDEX_SUFFIX `Text.isSuffixOf`) 
           . from 

inputFeatures :: Config -> ListZipper Token  -> [Txt]
inputFeatures config x =
    catMaybes . prefixIndex iNPUT_PREFIX  . eval x . featureTemplate $ config

outputFeatures :: [Txt] -> [Txt]
outputFeatures ys = catMaybes . prefixIndex oUTPUT_PREFIX . map Just $
    case ys of
      (y:y':_) -> [y,y`Text.append`y']
      [y]      -> [y]
      []       -> []

features :: (Functor m, MonadAtoms m) => Maybe (Int,Int) -> Config 
         -> ListZipper Token 
         -> m (V.Vector Int)
features bounds config = do 
  case (flagHash . flags $ config,bounds) of
    (True,Just (_,size))  ->   
               return 
             . V.fromList 
             . map (toAtom' size)
             . inputFeatures config
    (False,Nothing) ->    
                fmap V.fromList 
              . mapM toAtom
              . inputFeatures config

maybeFeatures :: (Functor m, MonadAtoms m) => Maybe (Int,Int) -> Config 
         -> ListZipper Token 
         -> m (V.Vector Int)
maybeFeatures bounds config = do
    case (flagHash . flags $ config,bounds) of
      (True,Just _)  ->   features bounds config
      (False,Nothing) ->    
                fmap V.fromList 
              . fmap catMaybes
              . mapM maybeToAtom
              . inputFeatures config
prefixIndex :: Txt -> [Maybe Txt] -> [Maybe Txt]
prefixIndex str = zipWith (\i x -> Just str +++ Just (Text.show i) 
                                            +++ Just "=" 
                                            +++ x ) 
                          [1..]

(+++) = liftM2 (\s t -> Text.concat [s,t])

index [] _     = Nothing
index (x:_)  0 = Just x
index (_:xs) i = index xs (i-1)


sent = LZ.fromList [["I","pro"],["like","v"],["Ike","pn"]] :: ListZipper Token
